library(tidyverse)
library(rlang)
library(lubridate)
library(scales)
library(ggrepel)
library(glue)
library(rvest)
library(pander)
library(plotly)
library(QuantTools)
library(jsonlite)
panderOptions("big.mark", ",")
panderOptions("table.split.table", Inf)
panderOptions("table.style", "rmarkdown")
panderOptions("missing", "")
theme_set(theme_bw())
auStates <- c(
ACT = "Australian Capital Territory",
QLD = "Queensland",
NSW = "New South Wales",
VIC = "Victoria",
SA = "South Australia",
WA = "Western Australia",
NT = "Northern Territory",
TAS = "Tasmania",
AUS = "All States"
)
ausPops <- tribble(
~State, ~Population,
"New South Wales", 8117976,
"Victoria", 6629870,
"Queensland", 5115451,
"South Australia", 1756494,
"Western Australia", 2630557,
"Tasmania", 535500,
"Northern Territory", 245562,
"Australian Capital Territory", 428060
) %>%
bind_rows(
tibble(
State = "All States",
Population = sum(.$Population)
)
)
data <- fromJSON("https://covidlive.com.au/covid-live.json") %>%
as_tibble() %>%
mutate(
across(
.cols = ends_with("CNT"),
.fns = as.numeric
),
REPORT_DATE = ymd(REPORT_DATE)
)
dt <- data %>%
dplyr::filter(
CODE == "AUS", !is.na(LAST_UPDATED_DATE)
) %>%
pull(REPORT_DATE) %>%
max()
International data and figures can be viewed here
Australian State populations were taken from the ABS Website and were accurate in Sept 2019.
data %>%
dplyr::filter(
REPORT_DATE == dt
) %>%
dplyr::mutate(
Increase = CASE_CNT - PREV_CASE_CNT,
`% Increase` = percent(Increase / PREV_CASE_CNT, accuracy = 0.1),
`Fatality Rate` = percent(DEATH_CNT / CASE_CNT, accuracy = 0.1),
`Recovery Rate` = percent(RECOV_CNT / CASE_CNT, accuracy = 0.1),
State = case_when(
CODE == "AUS" ~ "National Total",
TRUE ~ auStates[CODE]
),
State = factor(State, levels = c(dplyr::arrange(ausPops, desc(Population))$State, "National Total"))
) %>%
dplyr::rename(
Fatalities = DEATH_CNT,
Recovered = RECOV_CNT,
`Currently Active` = ACTIVE_CNT
) %>%
dplyr::select(
State,
PREV_CASE_CNT, CASE_CNT,
contains("Increase"),
contains("Fatal"),
contains("Recov", ignore.case = FALSE),
`Currently Active`
) %>%
dplyr::arrange(State) %>%
setNames(
str_replace_all(names(.), "PREV_CASE_CNT", as.character(dt - 1))
) %>%
setNames(
str_replace_all(names(.), "CASE_CNT", as.character(dt))
) %>%
pander(
justify = "lrrrrrrrrr",
caption = paste(
"*Confirmed cases, fatalities and recoveries reported by each state at time of preparation.*"
),
emphasize.strong.rows = nrow(.)
)
| State | 2021-01-21 | 2021-01-22 | Increase | % Increase | Fatalities | Fatality Rate | Recovered | Recovery Rate | Currently Active |
|---|---|---|---|---|---|---|---|---|---|
| New South Wales | 5,084 | 5,083 | -1 | 0.0% | 54 | 1.1% | 3,236 | 63.7% | 78 |
| Victoria | 20,433 | 20,434 | 1 | 0.0% | 820 | 4.0% | 19,583 | 95.8% | 31 |
| Queensland | 1,300 | 1,303 | 3 | 0.2% | 6 | 0.5% | 1,261 | 96.8% | 23 |
| Western Australia | 888 | 890 | 2 | 0.2% | 9 | 1.0% | 869 | 97.6% | 12 |
| South Australia | 596 | 596 | 0 | 0.0% | 4 | 0.7% | 587 | 98.5% | 5 |
| Tasmania | 234 | 234 | 0 | 0.0% | 13 | 5.6% | 221 | 94.4% | 0 |
| Australian Capital Territory | 118 | 118 | 0 | 0.0% | 3 | 2.5% | 115 | 97.5% | 0 |
| Northern Territory | 97 | 97 | 0 | 0.0% | 0 | 0.0% | 89 | 91.8% | 8 |
| National Total | 28,750 | 28,755 | 5 | 0.0% | 909 | 3.2% | 25,961 | 90.3% | 157 |
ausStatsCap <- "*Current confirmed and recovered cases, along with fatalities for Australia only. Active cases are shown as confirmed cases excluding fatalities and those classed as recovered. Some data regarding recovered cases prior to 1^st^ May 2020 may be estimates.*"
ggplotly(
data %>%
dplyr::filter(CODE == "AUS", REPORT_DATE <= dt, REPORT_DATE > "2020-03-01") %>%
dplyr::select(
Date = REPORT_DATE,
Confirmed = CASE_CNT,
Active = ACTIVE_CNT,
Fatal = DEATH_CNT,
Recovered = RECOV_CNT
) %>%
mutate(
Active = case_when(
is.na(Active) ~ Confirmed - Fatal - Recovered,
TRUE ~ Active
)
) %>%
pivot_longer(
cols = c("Active", "Fatal", "Recovered"),
names_to = "Status", values_to = "Total"
) %>%
mutate(
Status = factor(Status, levels = c("Fatal", "Recovered", "Active"))
) %>%
ggplot(aes(Date, Total, fill = Status)) +
geom_col() +
geom_line(
data = . %>%
group_by(Date) %>%
summarise(
Total = sum(Total)
) %>%
mutate(Status = "Confirmed"),
colour = "blue"
) +
scale_fill_manual(
values = c(
Active = rgb(0, 0, 0),
Confirmed = rgb(0, 0.3, 0.7),
Fatal = rgb(0.8, 0.2, 0.2),
Recovered = rgb(0.2, 0.7, 0.4)
)
) +
scale_x_date(expand = expansion(c(0, 0.03))) +
scale_y_continuous(expand = expansion(c(0, 0.05))) +
labs("Total Cases")
)
Current confirmed and recovered cases, along with fatalities for Australia only. Active cases are shown as confirmed cases excluding fatalities and those classed as recovered. Some data regarding recovered cases prior to 1st May 2020 may be estimates.
ggplotly(
data %>%
dplyr::filter(CODE != "AUS", REPORT_DATE <= dt, REPORT_DATE > "2020-03-01") %>%
dplyr::select(
CODE,
Date = REPORT_DATE,
Confirmed = CASE_CNT,
Active = ACTIVE_CNT,
Fatal = DEATH_CNT,
Recovered = RECOV_CNT
) %>%
mutate(
Active = case_when(
is.na(Active) ~ Confirmed - Fatal - Recovered,
TRUE ~ Active
),
State = auStates[CODE]
) %>%
pivot_longer(
cols = c("Active", "Fatal", "Recovered"),
names_to = "Status", values_to = "Total"
) %>%
left_join(ausPops) %>%
mutate(
Status = factor(Status, levels = c("Fatal", "Recovered", "Active")),
Rate = 1e6*Total / Population
) %>%
ggplot(aes(Date, Rate, fill = Status, label = Total)) +
geom_col() +
geom_line(
data = . %>%
group_by(State, Date) %>%
summarise(
Rate = sum(Rate),
Total = sum(Total)
) %>%
mutate(Status = "Confirmed"),
colour = "blue"
) +
facet_wrap(~State, ncol = 4) +
scale_fill_manual(
values = c(
Active = rgb(0, 0, 0),
Confirmed = rgb(0, 0.3, 0.7),
Fatal = rgb(0.8, 0.2, 0.2),
Recovered = rgb(0.2, 0.7, 0.4)
)
) +
scale_x_date(expand = expansion(c(0, 0.03))) +
labs(y = "Rate (Cases / Million)")
)
Breakdown of individual states. Victorian recovered numbers began to be accurately reported from 22nd March, with other states gradually providing this information. NSW/QLD recovered cases have only recently begun being reported and up until the most recent dates, recovered/active values were very approximate for these states. The extreme drop for NSW active cases in early June is a function of the changed reporting strategy implemented by NSW Health.
ggplotly(
data %>%
dplyr::select(
State = CODE, date = REPORT_DATE,
confirmed = CASE_CNT, daily = NEW_CASE_CNT
) %>%
mutate(
State = auStates[State]
) %>%
arrange(State, date) %>%
group_by(State) %>%
mutate(
daily = c(0, diff(confirmed)),
daily = ifelse(daily < 0, 0, daily),
MA = round(sma(daily, 7), 2),
MA2 = round(sma(daily, 14), 2),
`Above Average` = MA > MA2
) %>%
dplyr::filter(date > "2020-03-01") %>%
ggplot(aes(date, daily)) +
geom_col(
aes(fill = `Above Average`, colour = `Above Average`),
data = . %>% dplyr::filter(!is.na(`Above Average`)),
width = 1/2
) +
geom_line(aes(y = MA), colour = "blue") +
geom_line(aes(y = MA2), colour = "black") +
facet_wrap(~State, scales = "free_y") +
labs(
x = "Date",
y = "Daily New Cases",
fill = "\nAbove\nAverage"
) +
scale_fill_manual(values = c("white", rgb(1, 0.2, 0.2))) +
scale_colour_manual(values = c("grey50", rgb(1, 0.2, 0.2))),
tooltip = c(
"date", "daily", "MA"
)
)
Daily new cases for each state shown against the 7-day (blue) and 14-day (black) averages. Days which the 7-day average is above the 14-day average are highlighted in red.
inc <- 6
icu <- 11
d <- 7
offset <- icu + d
minDate <- "2020-04-20"
list(
data %>%
dplyr::filter(CODE == "AUS", REPORT_DATE > minDate) %>%
dplyr::select(date = REPORT_DATE, confirmed = CASE_CNT, deaths = DEATH_CNT) %>%
arrange(date) %>%
mutate(fr = deaths / confirmed, type = "No Offset"),
data %>%
dplyr::filter(CODE == "AUS") %>%
dplyr::select(date = REPORT_DATE, confirmed = CASE_CNT, deaths = DEATH_CNT) %>%
arrange(date) %>%
mutate(
confirmed = c(rep(NA, offset), confirmed[seq_len(nrow(.) - offset)]),
fr = deaths / confirmed,
type = glue("Offset ({offset} days)")
) %>%
dplyr::filter(date > minDate)
) %>%
bind_rows() %>%
ggplot(
aes(date, fr, colour = type)
) +
geom_line() +
scale_x_date(
expand = expansion(mult = 0, add = 20)
) +
scale_y_continuous(label = percent) +
labs(
x = "Date",
y = "Estimated Fatality Rate",
colour = "Calculation"
)
Fatality rate for Australian cases as calculated using two methods. Where no offset is included, the rate shown is simply the number of fatalities divided by the total number of reported cases on the same date. When cases increase during a new outbreak, this will skew the fatality rate lower. An alternative is to use an offset based on the fact the the median time from infection to symptom onset is 6 days, the median time from symptom onset to ICU admission is 11 days, and the median time from ICU admission to mortality is 7 days. When using the offset, the fatality rate is calculated as the number of recorded fatalities on a given date, divided by by the number of cases from 18 days ago. Whilst still flawed this may give a less biased estimate on the true fatality rate, and importantly, will always be higher than the alternative calculation. The intial fatality rate spiked above 30% during the intial outbreak under the offset approach, and as such, data is only shown after 20 Apr, 2020. All times used for estimation the offset were obtained from here
n <- 14
minCases <- 1
cp <- glue(
"*Growth factor for each State/Territory.
__Values are calculated using only locally-acquired cases__.
In order to try and minimise volatility a {n} day simple moving average was used, in contrast to the 5 day average as advocated [here](https://www.abc.net.au/news/2020-04-10/coronavirus-data-australia-growth-factor-covid-19/12132478).
This enables assessment of the growth factor over an entire quarantine period.
This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak.
As a result, values are only shown when the {n}-day average of new __locally acquired cases__ exceeds {minCases}.*"
)
gf <- data %>%
mutate(
local = CASE_CNT - SRC_OVERSEAS_CNT,
State = auStates[CODE]
) %>%
dplyr::select(State, date = REPORT_DATE, local) %>%
arrange(date) %>%
dplyr::filter(
date >= "2020-03-28",
date <= dt
) %>%
group_by(State) %>%
mutate(
new = c(0, diff(local)),
new_ma = sma(new, n)
) %>%
dplyr::filter(local > 0, !is.na(new_ma)) %>%
mutate(
R = c(NA, new_ma[-1] / new_ma[-n()]),
R = case_when(
is.nan(R) ~ NA_real_,
new_ma < minCases ~ NA_real_,
TRUE ~ R
)
) %>%
ungroup() %>%
arrange(State) %>%
ggplot(aes(date, R, colour = State)) +
geom_ribbon(aes(ymin = 1, ymax = R), alpha = 0.1) +
geom_hline(yintercept = 1) +
geom_label(
aes(label = R),
data = . %>%
dplyr::filter(date == max(date), !is.na(R)) %>%
mutate(R = round(R, 2), date = date + 1),
fill = rgb(1, 1, 1, 0.3),
show.legend = FALSE,
nudge_y = 0.3,
size = 4
) +
labs(
x = "Date", y = "Growth Factor"
) +
facet_wrap(~State, scales = "free_x") +
theme(legend.position = "none") +
coord_cartesian(ylim = c(0.5, 1.8))
gf
Growth factor for each State/Territory. Values are calculated using only locally-acquired cases. In order to try and minimise volatility a 14 day simple moving average was used, in contrast to the 5 day average as advocated here. This enables assessment of the growth factor over an entire quarantine period. This value becomes volatile when daily new cases approach zero as is commonly observed in small populations, and at the end stages of an outbreak. As a result, values are only shown when the 14-day average of new locally acquired cases exceeds 1.
The current 14 day growth factor is 0.79 which gives some degree of confidence that the spread of infections is relatively under control.
# tested %>%
# left_join(confirmed, by = c("State", "Country", "date") ) %>%
data %>%
dplyr::filter(REPORT_DATE == dt) %>%
mutate(
State = auStates[CODE],
TEST_CNT = case_when(
is.na(TEST_CNT) ~ PREV_TEST_CNT,
!is.na(TEST_CNT) ~ TEST_CNT
)
) %>%
dplyr::select(date = REPORT_DATE, State, tests = TEST_CNT, confirmed = CASE_CNT) %>%
left_join(ausPops, by = "State") %>%
mutate(
`Tests / '000` = round(1e3 * tests / Population, 2),
Positive = confirmed / tests,
Negative = 1 - Positive,
isTotal = grepl("Total", State),
State = str_replace(State, "All States", "National Total")
) %>%
dplyr::select(
State, Population,
Confirmed = confirmed,
Tests = tests,
contains("000"),
ends_with("ive"),
isTotal
) %>%
arrange(isTotal, desc(`Tests / '000`)) %>%
dplyr::select(-isTotal) %>%
dplyr::rename(
`% Positive Tests` = Positive,
`% Negative Tests` = Negative
) %>%
mutate_at(
vars(starts_with("%")), percent, accuracy = 0.01
) %>%
split(f = .$State == "National Total") %>%
bind_rows() %>%
pander(
justify = "lrrrrrr",
missing = "",
caption = glue(
"*COVID-19 testing scaled by state population size.
Confirmed cases are assumed to be the tests returning a positive result.
The current numbers available for some states are a lower limit, and as such, the proportion of the population tested is likely to be higher, as is the proportion of tests returning a negative result.*"
),
emphasize.strong.rows = nrow(.)
)
| State | Population | Confirmed | Tests | Tests / '000 | % Positive Tests | % Negative Tests |
|---|---|---|---|---|---|---|
| Victoria | 6,629,870 | 20,434 | 4,338,030 | 654.3 | 0.47% | 99.53% |
| New South Wales | 8,117,976 | 5,083 | 4,541,913 | 559.5 | 0.11% | 99.89% |
| South Australia | 1,756,494 | 596 | 935,333 | 532.5 | 0.06% | 99.94% |
| Northern Territory | 245,562 | 97 | 95,738 | 389.9 | 0.10% | 99.90% |
| Australian Capital Territory | 428,060 | 118 | 153,455 | 358.5 | 0.08% | 99.92% |
| Queensland | 5,115,451 | 1,303 | 1,715,747 | 335.4 | 0.08% | 99.92% |
| Tasmania | 535,500 | 234 | 154,963 | 289.4 | 0.15% | 99.85% |
| Western Australia | 2,630,557 | 890 | 707,133 | 268.8 | 0.13% | 99.87% |
| National Total | 25,459,470 | 28,755 | 12,642,312 | 496.6 | 0.23% | 99.77% |
R version 4.0.3 (2020-10-10)
**Platform:** x86_64-pc-linux-gnu (64-bit)
locale: LC_CTYPE=C, LC_NUMERIC=C, LC_TIME=C, LC_COLLATE=C, LC_MONETARY=C, LC_MESSAGES=en_AU.UTF-8, LC_PAPER=en_AU.UTF-8, LC_NAME=C, LC_ADDRESS=C, LC_TELEPHONE=C, LC_MEASUREMENT=en_AU.UTF-8 and LC_IDENTIFICATION=C
attached base packages: stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: jsonlite(v.1.7.2), QuantTools(v.0.5.7.1), data.table(v.1.13.6), plotly(v.4.9.3), pander(v.0.6.3), rvest(v.0.3.6), xml2(v.1.3.2), glue(v.1.4.2), ggrepel(v.0.9.0), scales(v.1.1.1), lubridate(v.1.7.9.2), rlang(v.0.4.10), forcats(v.0.5.0), stringr(v.1.4.0), dplyr(v.1.0.2), purrr(v.0.3.4), readr(v.1.4.0), tidyr(v.1.1.2), tibble(v.3.0.4), ggplot2(v.3.3.3) and tidyverse(v.1.3.0)
loaded via a namespace (and not attached): Rcpp(v.1.0.5), ps(v.1.5.0), assertthat(v.0.2.1), digest(v.0.6.27), R6(v.2.5.0), cellranger(v.1.1.0), backports(v.1.2.1), reprex(v.0.3.0), evaluate(v.0.14), highr(v.0.8), httr(v.1.4.2), pillar(v.1.4.7), lazyeval(v.0.2.2), curl(v.4.3), readxl(v.1.3.1), rstudioapi(v.0.13), rmarkdown(v.2.6), labeling(v.0.4.2), htmlwidgets(v.1.5.3), munsell(v.0.5.0), broom(v.0.7.3), compiler(v.4.0.3), modelr(v.0.1.8), xfun(v.0.20), pkgconfig(v.2.0.3), htmltools(v.0.5.0), tidyselect(v.1.1.0), fasttime(v.1.0-2), fansi(v.0.4.1), viridisLite(v.0.3.0), crayon(v.1.3.4), dbplyr(v.2.0.0), withr(v.2.3.0), grid(v.4.0.3), gtable(v.0.3.0), lifecycle(v.0.2.0), DBI(v.1.1.0), magrittr(v.2.0.1), cli(v.2.2.0), stringi(v.1.5.3), farver(v.2.0.3), fs(v.1.5.0), ellipsis(v.0.3.1), generics(v.0.1.0), vctrs(v.0.3.6), tools(v.4.0.3), hms(v.0.5.3), crosstalk(v.1.1.0.1), yaml(v.2.2.1), colorspace(v.2.0-0), knitr(v.1.30) and haven(v.2.3.1)